deadline = 5
vocab_size = 45795
data_len = 180
batch_size = 128
embedding_size = 64
hiddin_size = 128
learnning_rate = 0.01
nb_head = 4
size_per_head = 32
nb_layers = 3